#!/usr/bin/env python
# encoding: utf-8
"""
@author: youfeng
@email: youfeng243@163.com
@license: Apache Licence
@file: find_not_crawl_company.py
@time: 2018/1/18 13:15
"""

import sys

sys.path.append('..')
sys.path.append('../..')

from common.mongo import MongDb
from logger import Logger

MONGO_DB_TARGET = {
    'host': '172.16.215.2',
    'port': 40042,
    'db': 'company_data',
    'username': 'work',
    'password': 'haizhi'
}

MONGO_CONF = {
    'host': '172.16.215.16',
    'port': 40042,
    'db': 'app_data',
    'username': 'work',
    'password': 'haizhi'
}

# 日志模块
log = Logger('find_not_crawl_company.log').get_logger()

source_db = MongDb(MONGO_CONF['host'], MONGO_CONF['port'], MONGO_CONF['db'],
                   MONGO_CONF['username'],
                   MONGO_CONF['password'], log=log)

target_db = MongDb(MONGO_DB_TARGET['host'], MONGO_DB_TARGET['port'], MONGO_DB_TARGET['db'],
                   MONGO_DB_TARGET['username'],
                   MONGO_DB_TARGET['password'], log=log)


def main():
    source_table = 'new_registed_zhejiang'
    target_table = 'zhejiang_not_crawl'
    app_data_table = 'enterprise_data_gov'
    result_list = []
    count = 0
    total = 0
    for item in target_db.traverse_batch(source_table, {}):
        _id = item.get('_id')
        total += 1
        result = source_db.find_one(app_data_table, {'company': _id})
        if result is not None:
            continue

        count += 1
        result_list.append(item)
        if len(result_list) >= 500:
            target_db.insert_batch_data(target_table, result_list, insert=True)
            del result_list[:]

        if total % 1000 == 0:
            log.info("当前处理进度: total = {} count = {}".format(total, count))

    if len(result_list) > 0:
        target_db.insert_batch_data(target_table, result_list, insert=True)
        del result_list[:]


if __name__ == '__main__':
    main()
